*********************************************************************************
*                                                                               *
*                          Replication Garmann 2017     	        			*
*                                                                               *
*                           Preparing the Dataset                   		    *
*                                                                               *
*********************************************************************************

global beg_path `"C:\Users\JohnSmith\Dropbox\Replication_EF"'   
 
ssc install matchit, replace // installing command matchit 
ssc install freqindex, replace //  installing command freqindex


*Importing data from the communes (official data received from Hesse Statistics) 

cd `"${beg_path}\Data\Analysis 2B\"'

import excel  using `"New data on Hesse\Übersicht_BM_2020.xlsx"', sheet(Bürgermeisterwahlen) first clear

keep NR NAME STICHWAHL WAHLTAG WAHLBETEILIGUNG WAHLBERECHTIGTE WAEHLER UNG I
sort NAME
egen gem_grouped = group(NAME)
gen id = . 
replace id = gem_grouped if gem_grouped <309 
replace id = gem_grouped-1 if gem_grouped >309 
replace id = 427 if id == . 

*Merging with communes ids and kreis extracted from Garmann's data (to get information on kreis, not available in the data from Hesse Statistics) 

merge m:1 id using `"New data on Hesse\Geminde_kreis.dta"'
matchit gemeindename NAME 
list gemeindename NAME  if similscore < .5 // looks ok
list gemeindename NAME  if similscore < .9  & similscore > .5  // looks ok
drop _merge

*Preparing gemeindename for match with kreis (Landratswahlen)
replace kreis = "Odenwaldkreis" if id == 427 // New city since 2018, not in Garmann's data

sort kreis 
egen temp1 = group(kreis) // Grouping data by Kreis
fre temp1 kreis  // Checking the list of Kreis 
gen filter = inlist(temp1, 2, 4, 11, 19, 26) // "kreisfreie Stadt", not included in Landratswahlen, we need to put them at the end of the list
gen temp2 = kreis 
replace temp2 = "zzz" if filter == 1 // The "kreisfreie Stadt" will become a new kreis "zzz"  
sort temp2
egen kreis_groupped = group(temp2) 
sort kreis_groupped  kreis 
list kreis_groupped kreis // 21 + 1 kreis_groupped (can be matched)
drop temp*
save `"New data on Hesse\Übersicht_BM_2020_geminde.dta"', replace // This will be used as a base for the dataset of all local elections

* Preparation for the Landratswahlen
use `"New data on Hesse\Übersicht_BM_2020_geminde.dta"', replace
keep if kreis_groupped <22
bysort id: gen temp1 = _n 
keep if temp1 == 1 
drop temp1
fre id // 422 cities, that's correct (5 "kreisfreie Stadt" are removed)
keep id NAME gemeindename kreis kreis_groupped 
fre kreis
save `"New data on Hesse\geminde_base.dta"', replace // This will be merged with the original data on Landratswahlen

*Creation of the dataset of Landratswahlen
import excel  using `"New data on Hesse\Übersicht_BM_2020.xlsx"', sheet(Landratswahlen) first clear
keep NAME STICHWAHL WAHLTAG WAHLBETEILIGUNG WAHLBERECHTIGTE WAEHLER UNG I
gen kreis = NAME
drop NAME 
sort kreis 
egen kreis_groupped = group(kreis)
joinby kreis_groupped using `"New data on Hesse\geminde_base.dta"' // Worked 132 Bergstrase
gen Election_type = "Landratswahlen"
drop if WAHLTAG<21185 & NAME=="Oberzent, Stadt" // Oberzent was created only on 01/01/2018 (21185 in SIF), there were thus no pre-2018 kreis-level elections specific to this municipality 
save `"New data on Hesse\Landratswahlen_to_append.dta"', replace

*******  Preparing referenda 

* Preparation of the municipality base for referendums
use `"New data on Hesse\Übersicht_BM_2020_geminde.dta"', replace
bysort id: gen temp1 = _n 
keep if temp1 == 1 
drop temp1
fre id // 422 cities, that's correct (5 "kreisfreie Stadt" are removed)
keep id NAME gemeindename kreis kreis_groupped 
fre kreis
save `"New data on Hesse\geminde_base_2.dta"', replace // This will be merged with the Referendums (by contrast to geminde_base, we include "kreisfreie Stadt")


import excel using  `"New data on Hesse\Buergerentscheide_Stand_16022020.xlsx"', sheet(Version_for_upload) first clear
drop if Voted ==. 
 keep ANZAHL Stat_digit NAME WAHLTAG WAHLBERECHTIGTE Voted WAHLBETEILIGUNG Invalid Invalid_share Valid Yes Yes_share No No_share
gen Election_type = "Bürgerentscheide"
destring WAHLBERECHTIGTE , replace
*The following standardize municipalities' names
replace NAME = "Bad Homburg v. d. Höhe, Stadt" if NAME =="Bad Homburg v.d. Höhe, Stadt"
replace NAME = "Neu-Anspach, Stadt" if NAME =="Neu-Anspach"
replace NAME = "Pohlheim, Stadt" if NAME =="Pohlheim"
replace NAME = "Rimbach" if NAME =="Rimbach im Odenwald"
replace NAME = "Schwalbach am Taunus, Stadt" if NAME =="Schwalbach am Taunus"
replace NAME = "Wetter (Hessen), Stadt" if NAME == "Wetter (Hessen)"
merge m:1 NAME using `"New data on Hesse\geminde_base_2.dta"' // all matched, good 
keep if _merge == 3
save `"New data on Hesse\Burgerentscheide_to_append.dta"', replace

*Preparation of Garmann (we will use mostly for pre-2017 federal elections)

****************************************
* Merging all local elections together * 
****************************************

use `"New data on Hesse\Übersicht_BM_2020_geminde.dta"', clear // Municipal  
gen Election_type = "Burgermeisterwahlen"
append using `"New data on Hesse\Landratswahlen_to_append.dta"' // Kreis 
append using `"New data on Hesse\Burgerentscheide_to_append.dta"' // Local Referendum 
egen temp1 = group(NAME) // Check: That's correct, 427 municipalities 
fre temp1   
keep  NAME gemeindename STICHWAHL WAHLTAG WAHLBERECHTIGTE WAHLBETEILIGUNG kreis kreis_groupped Election_type // Keeping only essential variables

*Adapt to the format of Garmann's dataset 
rename (WAHLBETEILIGUNG STICHWAHL Election_type) (turnout runoff local_type)
gen e_date = WAHLTAG 
format e_date %1.0f // to see the dates as SIF

save `"New data on Hesse\all_local.dta"', replace // Saving the new combined file 


****************************
* Preparing Garmann's data * 
****************************
 
use Election_frequency.dta, clear
encode artderwahl, gen(artderwahlb) // recode as categorical Variable
replace datum = "27.1.2008" if datum == "27.01.2008" // for consistency (zeroes are not used for the other dates)  
encode datum, gen(election_fixed)
*Getting dates of the national elections as variables
gen e_date = date(datum, "DMY")

//// Building the dataset ////
append using `"New data on Hesse\all_local.dta"'
drop if e_date >= 19624 // We keep only election up to the Federal elections of 2013 (on 19623 in SIF) 

*Adding the European Election of 1989 
expand 2 if artderwahlb ==2 & jahr==1994, gen(exp)
di date("18/06/1989", "DMY") //  in Stata format  
replace e_date = 10761 if exp==1 
replace datum ="18/06/1989" if exp==1
replace turnout = . if exp==1 
drop exp

*Adding the Federal election of 1990
expand 2 if artderwahlb ==1 & jahr==1994, gen(exp)
di date("02/12/1990", "DMY") //  in Stata format  
replace e_date = 11293 if exp==1 
replace datum ="02/12/1990" if exp==1
replace turnout = . if exp==1  
drop exp

*Adding the state election of 1991: 
expand 2 if artderwahlb ==4 & jahr==1995, gen(exp)
di date("20/01/1991", "DMY") // 11342 in Stata format   
replace e_date = 11342 if exp==1 
replace datum = "20/01/1991" if exp==1
replace turnout = . if exp==1 
drop exp

*Adding the local election of 1993: 
expand 2 if artderwahlb ==3 & jahr==1997, gen(exp)
di date("07/03/1993", "DMY") // 11342 in Stata format   
replace e_date = 12119 if exp==1 
replace datum = "07/03/1993" if exp==1
replace turnout = . if exp==1 
drop exp


//// Checking Joint Elections ////

by e_date gemeindename, sort: gen nvals = _n == 1
count if nvals == 0 // 1220 joint elections
tab nvals jahr
sort gemeindename e_date 
*list gemeindename e_date WAHLTAG datum artderwahlb local_type  nvals

egen group = group(gemeindename e_date) 

*Organize election by importance
fre local_type
recode artderwahlb (3=4) (4=3), gen(rank)
replace rank = 5 if local_type == "Landratswahlen"
replace rank = 6 if local_type == "Burgermeisterwahlen"
replace rank = 7 if local_type == "Bürgerentscheide" 
label define rank 1 "Bundestagswahl" 2 "Europawahl" 3 "Landtagswahl" 4 "Kommunalwahl" 5 "Landratswahlen" 6 "Burgermeisterwahlen" 7 "Bürgerentscheide"
label values rank rank 
fre rank // 14077 municipality-election observations (with 426 municipalities, that's 33 per municipality on average) 

sort group rank 
quietly by group:  gen rank2 = cond(_N==1,0,_n)
recode rank2 (0 =.), generate (rankgroup) 
drop rank2 

* labels 
label var group "Groups by elections date and municipality"
label var rank "Rank based on the election-type hierarchy" // 
label var rankgroup "Rank within the group by rank"
 
/* joint elections */
* re-grouping joint elections
sum rankgroup // max 3
gen joint_el1 = rank[_n+1] if rankgroup == 1 & rankgroup[_n+1] == 2 & group == group[_n+1]
gen joint_el2 = rank[_n+2] if rankgroup == 1& rankgroup[_n+2] == 3& group == group[_n+2]

* labels 
lab val joint_el1 rank 
lab val joint_el2 rank 
label var joint_el1 "2nd simultaneous elections"

* delete 2nd and 3rd elections 
drop if rankgroup > 1 & rankgroup != . // 1220 observations deleted => this matches the 1220 joint election (observed above) 
fre joint_el1 joint_el2 // summary of 2nd and 3rd simultaneous elections 
list rank joint_el1 joint_el2 if joint_el1!=. // to see all combinations 

//// Calculating the index ////

*Creating variables with the dates of legislative elections 
fre e_date datum if rank==1 // the first election (1990) is not studied
gen date_leg_1 = 12707
gen date_leg_2 = 14149
gen date_leg_3 = 15605
gen date_leg_4 = 16697
gen date_leg_5 = 18167
gen date_leg_6 = 19623

*Generating variables measuring distance to legislative elections
forvalues i=1(1)6  { 
generate timetoleg_`i' = date_leg_`i' - e_date if e_date < date_leg_`i'
}


*Generationg contributions to the index 

forvalues i = 1/6 {
gen no_weight_5_cont_`i' = .
replace no_weight_5_cont_`i' = 1/(exp(timetoleg_`i'/365.25)) if timetoleg_`i'< 1826.25 & timetoleg_`i' > 0 // 1826.25 corresponds to 5 years 
}

*Aggregating the contributions 

forvalues i = 1/6 {
egen no_weight_5_`i' = sum(no_weight_5_cont_`i'), by(gemeindename)
}

*Generating Indexes

gen index_n_w_5 = .
la var index_n_w_5  "General Index - 5 years: no weight"
forvalues i = 1/6 {
	replace index_n_w_5 = no_weight_5_`i' if date_leg_`i'==e_date
	}
	
keep if artderwahlb == 1
drop if turnout ==.
keep gemeindename jahr index_n_w_5

save index_n_w_5_to_merge.dta, replace // Saving the index to merge it with Garmann's data


